** This file: Imputes education over time and creates education variable ip1 


*** program setup
**********************************************************
version 14.2
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789
**********************************************************


forvalues c=0(1)14 {        /* Partition sample. More partitions reduce memory footprint but will decrease speed */
	forvalues y=1975(1)2007 {   /* Choose time period */

		di "year `y', c is `c'"
		di c(current_date)
		di c(current_time)

		local lowerlimit = `c'*5000000				
		local upperlimit = `c'*5000000+5000000

		if `c'<14 {
			di "read all vsnr from " `lowerlimit' " until " `upperlimit'
			use vsnr ausbild berufstg alter if vsnr>=`lowerlimit' & vsnr<`upperlimit' using "data\orig\clean`y'.dta", clear
			su vsnr , detail
		}
		else if `c'==14 {
			di "read rest vsnr from " `lowerlimit' 
			use vsnr ausbild berufstg alter if vsnr>=`lowerlimit' using "data\orig\clean`y'.dta", clear
			su vsnr , detail
		}

		keep vsnr ausbild berufstg alter
		gen year=`y'

		compress

		if `y'==1975 {
			save "data/temp1.dta", replace
		}
		else {
			append using "data/temp1.dta"
			save "data/temp1.dta", replace
	}

	}   /* end forvalues y=1975(1)2007 */
	
	erase "data/temp1.dta"

	
	/* The following runs the imputation procedure by Fitzenberger, Osikominu and Voelter 
	(published in Schmollers Jahrbuch, 2006), adjusted to the data we are working with */

	/***********************************************************************************
	This program creates the improved education variable IP1 used in the paper
	"Imputation Rules to Improve the Education Variable in the IAB Employment Supsample"
	 by Bernd Fitzenberger, Aderonke Osikominu and Robert Voelter. The data is the IABS
	 regional file 1975-1997.
	(c) 2005 Bernd Fitzenberger, Aderonke Osikominu and Robert Voelter
	Goethe-Universitaet Frankfurt
	************************************************************************************/

	rename vsnr PNR
	rename berufstg STIB
	rename ausbild BILD
	tab BILD

	/* Here I construct a hypothetical SPELL number, since that does not exist in our cleanXXXX datasets... */
	gen SPELL=year-1975
	/* Here I construct a hypothetical TYP variable, indicating that all spells are employment spells in our original cleanXXXX datasets */
	gen TYP=1
	compress
	/* Here I construct a hypothetical NSPELL number, since that does not exist in our cleanXXXX datasets... */
	bysort PNR: egen NSPELL=sum(TYP)
	/* Here I construct a hypothetical BTYP variable. This does not exist in the base datasets cleanXXXX. Since these are all employment spells, I set BTYP=1 */
	gen BTYP=1

	compress

	*SPELL2 is inverse counter of SPELL
	gen int SPELL2=(-1)*SPELL

	/*data preparation*/
	*generate the new education variable
	generate byte IP1=0

	******************************
	* Change weak anonym. File
	******************************
	rename alter ALTER
	*generate age for uncensored years of birth
	*gen ALTER=AJAHR-GEBJAHR if GEBJAHR<89
	sort PNR SPELL
	*impute likely age for censored years of birth
	*by PNR: replace ALTER=15+AJAHR[_n]-AJAHR[1] if GEBJAHR==90
	*by PNR: replace ALTER=63+AJAHR[_n]-AJAHR[_N] if GEBJAHR==95

	******************************
	* Change End
	******************************

	*one value (-9) for missing education
	by PNR SPELL, sort: replace IP1= -9 if BILD[_n]==7 | BILD[_n]==9 | BILD[_n]==.


	/* +++++++++++
	   section 1
	   acceptance of spells
	   +++++++++++

	special rules for young persons*/
	*if age below 18 any formal education (2 to 6) is implausible
	*impute "no formal education" (1)
	by PNR SPELL, sort: replace IP1= 1 if ALTER[_n]<18

	/* I code ausbild=0 as the lowest education level */
	replace IP1=1 if BILD==0


	tabulate IP1

	/*
	Accept only education information from employment spells.
	Three types of spells:
	1) employment spells TYP~=6, BTYP==1
	2) technical spells TYP~=6, BTYP~=1
	3) UI benefit spells TYP==6

	Define valid education information:
	i) employment spell AND
	ii) non missing education information 1<=BILD<=6
	*/

	gen byte valid=(TYP ~=6 & BTYP==1 & 1<=BILD & BILD<=6)

	#delimit ;
	/* accept all valid information */
	by PNR SPELL, sort: replace IP1=BILD[_n] if valid==1
												& ALTER[_n]>=18;

	/*
	For persons with education information missing at all spells:
	Impute "vocational tranining degree" if the employment status (STIB=2,3)
	indicates a qualified job at the respective employment spells.*/

	egen NMIS=sum(BILD==7 | BILD==9), by(PNR);
	generate DMIS= NMIS==NSPELL;


	by PNR SPELL, sort: replace IP1=2 if
							2<=STIB[_n] & STIB[_n]<=3
							& 1<=TYP[_n] & TYP[_n]<=5
							& BTYP==1
							& DMIS[_n]==1 & ALTER > 17;


	/* +++++++++++++++++++++
	   section 2
	   extrapolation
	   +++++++++++++++++++++

	Part 1: extrapolation to following spells*/

	sort PNR SPELL;

	by PNR: replace IP1=1 if _n>1
								 & IP1[_n-1]==1
								 & IP1[_n]<1; /*only to missing values*/

	by PNR: replace IP1=2 if _n>1
								 & IP1[_n-1]==2
								 & IP1[_n]<2; /*only to missing values and "1"*/

	by PNR: replace IP1=3 if _n>1
								 & IP1[_n-1]==3
								 & IP1[_n]<2; /*only to missing values and "1"*/

	by PNR: replace IP1=4 if _n>1
								 & (IP1[_n-1]==4 | (IP1[_n-1]==2 & IP1[_n]==3) | (IP1[_n-1]==3 & IP1[_n]==2))
								 & IP1[_n]<4;

	by PNR: replace IP1=5 if _n>1
								 & IP1[_n-1]==5
								 & IP1[_n]<5;

	by PNR: replace IP1=6 if _n>1
								 & IP1[_n-1]==6
								 & IP1[_n]<6;



	/* part 2: backwards extrapolation to previous spells*/

	by PNR SPELL2, sort: replace IP1=6 if
							 _n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==6
							& ALTER[_n]>=29 ;

	by PNR SPELL2, sort: replace IP1=5 if
							 _n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==5
							& ALTER[_n]>=27 ;

	by PNR SPELL2, sort: replace IP1=4 if
							_n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==4
							& ALTER[_n]>=23 ;

	by PNR SPELL2, sort: replace IP1=3 if
							_n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==3
							& ALTER[_n]>=21 ;

	by PNR SPELL2, sort: replace IP1=2 if
							_n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==2
							& ALTER[_n]>=20;

	by PNR SPELL2, sort: replace IP1=1 if
							_n>1
							& (IP1[_n]==-9 | IP1[_n]==0)
							& IP1[_n-1]==1;


	#delimit cr

	/* +++++++++++
	   section 3
	   +++++++++++
	*/


	sort PNR SPELL

	*single missing value
	replace IP1=-9 if IP1==0

	tab year
	tab IP1
	tab BILD

	rename PNR vsnr
	rename IP1 ip1
	keep vsnr year ip1

	**Educational categorization for imputed education**
	gen 	imp_edu=1 if ip1==0 | ip1==1
	replace imp_edu=2 if ip1==2 | ip1==4 | ip1==3
	replace imp_edu=3 if ip1==5 | ip1==6
	replace imp_edu=1 if ip1<0 	/* all missing are coded as low skill */
	label define imp_edu 1 "[1] None or only a school degree" 2 "[2] High school and vocational" 3 "[3] Technical college / university"
	label values imp_edu imp_edu
	label variable imp_edu "imputed education"

	compress
	forvalues y=1975(1)2007 {
	preserve
	drop if year!=`y'
	sort vsnr year
	save "data/educationimputationall`y'`c'.dta", replace
	restore
	} /* end forvalues y */
} /* end foreach c */


* Merge across years y and blocks c
forvalues y=1975(1)2007 {
	use "data/educationimputationall`y'0.dta", clear
	erase "data/educationimputationall`y'0.dta"

	forvalues c=1(1)14 {
		append using "data/educationimputationall`y'`c'.dta"
		erase "data/educationimputationall`y'`c'.dta"
	}
	drop year
	sort vsnr
	describe
	save "data/educationimputationall`y'.dta", replace
}


clear all
exit
